cccol <- c("#CE0013","#FA8072","#32CD32","#7FFFD4","#3A5FCD","#004138","#00CED1","#190246","#EEEE00")

############################################################################
#####################         read in data       ###########################
############################################################################

logfpkm2nd <- read.table("../data/2nd.reprogramming.lg2.all.fpkm.txt",header=T,row.names=1)

n_path <- c("hiF_r1","hiF_r2","he0_r1","he0_r2","he2_r1","he2_r2","he6_r1","he6_r2","n8_r1","n8_r2","n8_r3","n12_r1","n12_r2","n14_r1","n14_r2","n14_r3","n20_r1","n20_r2","n20_r3","n24p_r1","n24p_r2","n24m_r1","n24m_r2","niPS_r1","niPS_r2")
nData_tmp <- logfpkm2nd[,n_path]
nfpkm2nd <- 2**nData_tmp - 1

n_time_point <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24pdox","n24mdox","niPS")
n_label <- c("hiF-T","0d","2d","6d","8d","12d","14d","20d","24d+dox","24d-dox","niPSC-T")
nData2ndfpkm <- cbind(apply(nfpkm2nd[,1:2],1,mean),apply(nfpkm2nd[,3:4],1,mean),apply(nfpkm2nd[,5:6],1,mean),apply(nfpkm2nd[,7:8],1,mean),apply(nfpkm2nd[,9:11],1,mean),apply(nfpkm2nd[,12:13],1,mean),apply(nfpkm2nd[,14:16],1,mean),apply(nfpkm2nd[,17:19],1,mean),apply(nfpkm2nd[,20:21],1,mean),apply(nfpkm2nd[,22:23],1,mean),apply(nfpkm2nd[,24:25],1,mean))
colnames(nData2ndfpkm) <- n_time_point
rownames(nData2ndfpkm) <- rownames(nfpkm2nd)
nData <- log2(nData2ndfpkm[,]+1)

n_deg <- read.table("Gfold/cutoff.0.58/naive.2nd.deg")[,1]
n_deg <- intersect(n_deg,rownames(nData))
nData <- nData[n_deg,]


############################################################################
#####################      cluster by correlation      #####################
############################################################################
library(amap)
k <- 14
set.seed(4)

km <- kmeans(nData,k)
nData <- log2(nData2ndfpkm[,]+1)
n_deg <- intersect(n_deg,rownames(nData))
nData <- nData[n_deg,]

km <- Kmeans(nData,k,method = "correlation")

selected_cluster <- c(10,11,4,5,8,6,2)
selected_cluster_name <- c("DOWN-EARLY","DOWN-LATE","UP-EARLY","UP-LATE","TRANSIENT-UP-EARLY","TRANSIENT-UP-MIDDLE","TRANSIENT-UP-LATE")
names(selected_cluster) <- selected_cluster_name

pdf(file = "Fig2C.pdf", width = 4, height = 7);
par(mfrow=c(3,1))

id = selected_cluster["DOWN-EARLY"]
modGenes = names(which(km$cluster==id))
v1 = apply(nData[modGenes,],2,mean)
n <- length(modGenes)
sd <- apply(nData[modGenes,],2,sd)
alpha <- 0.05
v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
plot(v1,lwd=3,type="l",col=cccol[3],main=paste("Down regulated genes"),ylim=c(0,4),xlab=NA,ylab="log2(fpkm+1)",xaxt="n")
axis(side=1,1:length(n_time_point),n_label,las=2);axis(side=2);box()
polygon(c(1,1:length(n_time_point),length(n_time_point):2),c(v2[1],v3,v2[length(n_time_point):2]),col=adjustcolor("grey", alpha.f = 0.3),border=NA)
id = selected_cluster["DOWN-LATE"]
modGenes = names(which(km$cluster==id |km$cluster==3 )) ###### comined cluster 3 with cluster10
v1 = apply(nData[modGenes,],2,mean)
n <- length(modGenes)
sd <- apply(nData[modGenes,],2,sd)
alpha <- 0.05
v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
points(v1,lwd=3,type="l",col=cccol[4])
polygon(c(1,1:length(n_time_point),length(n_time_point):2),c(v2[1],v3,v2[length(n_time_point):2]),col=adjustcolor("grey", alpha.f = 0.3),border=NA)
legend("topright",c("early somatic","late somatic"),col=cccol[3:4],lwd=3,bty="n")

id = selected_cluster["UP-EARLY"]
modGenes = names(which(km$cluster==id))
v1 = apply(nData[modGenes,],2,mean)
n <- length(modGenes)
sd <- apply(nData[modGenes,],2,sd)
alpha <- 0.05
v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
plot(v1,lwd=3,type="l",col=cccol[1],main=paste("Up regulated genes"),ylim=c(0,4),xlab=NA,ylab="log2(fpkm+1)",xaxt="n")
axis(side=1,1:length(n_time_point),n_label,las=2);axis(side=2);box()
polygon(c(1,1:length(n_time_point),length(n_time_point):2),c(v2[1],v3,v2[length(n_time_point):2]),col=adjustcolor("grey", alpha.f = 0.3),border=NA)
id = selected_cluster["UP-LATE"]
modGenes = names(which(km$cluster==id))
v1 = apply(nData[modGenes,],2,mean)
n <- length(modGenes)
sd <- apply(nData[modGenes,],2,sd)
alpha <- 0.05
v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
points(v1,lwd=3,type="l",col=cccol[2])
polygon(c(1,1:length(n_time_point),length(n_time_point):2),c(v2[1],v3,v2[length(n_time_point):2]),col=adjustcolor("grey", alpha.f = 0.3),border=NA)
legend("topleft",c("early embryogenesis","pre-implantation"),col=cccol[1:2],lwd=3,bty="n")

id = selected_cluster["TRANSIENT-UP-EARLY"]
modGenes = names(which(km$cluster==id))
v1 = apply(nData[modGenes,],2,mean)
n <- length(modGenes)
sd <- apply(nData[modGenes,],2,sd)
alpha <- 0.05
v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
plot(v1,lwd=3,type="l",col=cccol[5],main=paste("Transient up regulated genes"),ylim=c(0,4),xlab=NA,ylab="log2(fpkm+1)",xaxt="n")
axis(side=1,1:length(n_time_point),n_label,las=2);axis(side=2);box()
polygon(c(1,1:length(n_time_point),length(n_time_point):2),c(v2[1],v3,v2[length(n_time_point):2]),col=adjustcolor("grey", alpha.f = 0.3),border=NA)
id = selected_cluster["TRANSIENT-UP-MIDDLE"]
modGenes = names(which(km$cluster==id))
v1 = apply(nData[modGenes,],2,mean)
n <- length(modGenes)
sd <- apply(nData[modGenes,],2,sd)
alpha <- 0.05
v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
points(v1,lwd=3,type="l",col=cccol[6])
polygon(c(1,1:length(n_time_point),length(n_time_point):2),c(v2[1],v3,v2[length(n_time_point):2]),col=adjustcolor("grey", alpha.f = 0.3),border=NA)
id = selected_cluster["TRANSIENT-UP-LATE"]
modGenes = names(which(km$cluster==id))
v1 = apply(nData[modGenes,],2,mean)
n <- length(modGenes)
sd <- apply(nData[modGenes,],2,sd)
alpha <- 0.05
v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
points(v1,lwd=3,type="l",col=cccol[7])
polygon(c(1,1:length(n_time_point),length(n_time_point):2),c(v2[1],v3,v2[length(n_time_point):2]),col=adjustcolor("grey", alpha.f = 0.3),border=NA)
legend("topleft",c("late embryogenesis","metabolic","placenta development"),col=cccol[5:7],lwd=3,bty="n",ncol=2)

dev.off()
